import requests,time,pymysql
from lxml import etree
import csv

class HhSpider(object):
    def __init__(self):
        self.url = 'http://www.shpl.com.cn/'
        self.html = ''
        self.headers = {
            'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36'
        }

    def get_html(self):
        time.sleep(1)
        response = requests.get(self.url, headers=self.headers).text
        # print(response)
        self.html = etree.HTML(response)


    def parse_html(self):
        url = self.html.xpath('//div[@class="tab"]/span/a/@href')[0]
        self.url = self.url + url
        self.html_content(url=self.url)


    def html_content(self,url):
        response = requests.get(url=url,headers=self.headers).text
        # print(response)
        self.html = etree.HTML(response)

        contents = self.html.xpath('//body/div/div[1]/div/div[2]/div[2]/div/ul/li/a/text()')
        details = self.html.xpath('//body/div/div[1]/div/div[2]/div[2]/div/ul/li/a/@href')
        # print(details)

        for index,content in enumerate(contents):
            detail_url = 'http://www.shpl.com.cn/'+ details[index]
            print(content,detail_url)

            with open('data.csv','a',encoding='gbk') as csvfile:
                writer = csv.writer(csvfile)
                writer.writerow([f"'{content}'" ,f"'{detail_url}'"])





    def run(self):
        context = HhSpider()
        context.get_html()
        context.parse_html()
        context.html_content(url=self.url)
        # context.save_db()


if __name__ == '__main__':
    hehuang = HhSpider()
    hehuang.run()